library(ManyEcoEvo)
## Warning: replacing previous import 'purrr::%@%' by 'rlang::%@%' when loading
## 'ManyEcoEvo'
## Warning: replacing previous import 'purrr::flatten_lgl' by 'rlang::flatten_lgl'
## when loading 'ManyEcoEvo'
## Warning: replacing previous import 'purrr::splice' by 'rlang::splice' when
## loading 'ManyEcoEvo'
## Warning: replacing previous import 'purrr::flatten_chr' by 'rlang::flatten_chr'
## when loading 'ManyEcoEvo'
## Warning: replacing previous import 'purrr::flatten_raw' by 'rlang::flatten_raw'
## when loading 'ManyEcoEvo'
## Warning: replacing previous import 'purrr::flatten' by 'rlang::flatten' when
## loading 'ManyEcoEvo'
## Warning: replacing previous import 'purrr::flatten_dbl' by 'rlang::flatten_dbl'
## when loading 'ManyEcoEvo'
## Warning: replacing previous import 'purrr::invoke' by 'rlang::invoke' when
## loading 'ManyEcoEvo'
## Warning: replacing previous import 'purrr::flatten_int' by 'rlang::flatten_int'
## when loading 'ManyEcoEvo'
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(rlang)
## 
## Attaching package: 'rlang'
## 
## The following objects are masked from 'package:purrr':
## 
##     %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
##     flatten_raw, invoke, splice
library(performance)
library(see)
library(modelbased)

Exploring Model Weights

Create orchard-style plots for both BT and Euc, without removing analyses with extreme weights:

inv_bc_var_weights <- ManyEcoEvo_results %>% 
  ungroup %>% 
  filter(exclusion_set == "complete", 
         publishable_subset == "All",
         expertise_subset == "All",
         collinearity_subset == "All"
  ) %>% 
  hoist(.col = effects_analysis,
        "lambda",
        .simplify = TRUE,
        .transform = unique) %>% 
  select(exclusion_set, 
         dataset, 
         estimate_type, 
         box_cox_rating_cat, 
         lambda) %>% 
  mutate(predictor_means = 
           map(box_cox_rating_cat, modelbased::estimate_means),
         model_data = map(box_cox_rating_cat, ~pluck(.x, "frame") %>% 
                            drop_na() %>% 
                            as_tibble()),
         plot_name = paste(exclusion_set, dataset, sep = ", ")) %>%
  mutate(model_data = map(model_data, 
                          .f = ~ mutate(.x, PublishableAsIs =
                                          str_replace(PublishableAsIs,
                                                      "publishable with ", "") %>%
                                          str_replace("deeply flawed and ", "") %>% 
                                          capwords())),
         predictor_means = map(predictor_means,
                               .f = ~ mutate(.x, PublishableAsIs =
                                               str_replace(PublishableAsIs,
                                                           "publishable with ", "") %>%
                                               str_replace("deeply flawed and ", "") %>% 
                                               capwords()))) 
## We selected `at = c("PublishableAsIs")`.
## We selected `at = c("PublishableAsIs")`.
inv_bc_var_weights %>% 
  mutate(plot_name = 
           str_remove(plot_name, "complete, ") %>% 
           str_replace_all(., " ", "_") %>% 
           paste0("_orchard_plot")) %>% 
  pmap(.l = list(.$model_data, .$predictor_means, .$plot_name),
       .f = ~ plot_model_means_orchard(..1, 
                                       PublishableAsIs, 
                                       ..2,
                                       new_order = 
                                         c("Unpublishable",
                                           "Major Revision",
                                           "Minor Revision",
                                           "Publishable As Is"),
                                       ..3))
## [[1]]

## 
## [[2]]

From the above it’s clear that the general pattern is for the higher weighted cases to pull the model means closer to them. In the case of the eucalyptus data, it’s several EXTREME weights that are pulling the model means closer to them. Let’s look at the distributions of weights and their values for the euc dataset Weight Distribution Blue Tit Model:

ManyEcoEvo_results %>% 
  ungroup %>% 
  filter(exclusion_set == "complete", 
         publishable_subset == "All",
         expertise_subset == "All",
         dataset == "blue tit") %>% 
  hoist(.col = effects_analysis,
        "lambda",
        .simplify = TRUE,
        .transform = unique) %>% 
  select(exclusion_set, 
         dataset, 
         estimate_type, 
         box_cox_rating_cat, 
         lambda, data) %>% pull(box_cox_rating_cat) %>% 
  pluck(1) %>% 
  model.frame() %>% 
  as_tibble() %>% 
  rename(weights = `(weights)`) %>% 
  mutate(weights = as.numeric(weights)) %>% 
  ggplot(aes(x = weights)) +
  geom_histogram(bins = 30) +
  labs(title = "Distribution of Box-Cox Weights",
       x = "Box-Cox Weights",
       y = "Frequency") +
  theme_minimal() +
  theme(legend.position = "none")

Weight Distribution Eucalyptus Model

ManyEcoEvo_results %>% 
  ungroup %>% 
  filter(exclusion_set == "complete", 
         publishable_subset == "All",
         expertise_subset == "All",
         dataset == "eucalyptus") %>% 
  hoist(.col = effects_analysis,
        "lambda",
        .simplify = TRUE,
        .transform = unique) %>% 
  select(exclusion_set, 
         dataset, 
         estimate_type, 
         box_cox_rating_cat, 
         lambda, data) %>% 
  pull(box_cox_rating_cat) %>% 
  pluck(1) %>% 
  model.frame() %>% 
  as_tibble() %>% 
  rename(weights = `(weights)`) %>% 
  mutate(weights = as.numeric(weights)) %>% 
  ggplot(aes(x = weights)) +
  geom_histogram(bins = 30) +
  labs(title = "Distribution of Box-Cox Weights",
       x = "Box-Cox Weights",
       y = "Frequency") +
  theme_minimal() +
  theme(legend.position = "none")

Weight Distribution for Eucalyptus model when case with maximum weight removed

ManyEcoEvo_results %>% 
  ungroup %>% 
  filter(exclusion_set == "complete", 
         publishable_subset == "All",
         expertise_subset == "All",
         dataset == "eucalyptus") %>% 
  hoist(.col = effects_analysis,
        "lambda",
        .simplify = TRUE,
        .transform = unique) %>% 
  select(exclusion_set, 
         dataset, 
         estimate_type, 
         box_cox_rating_cat, 
         lambda, data) %>% 
  pull(box_cox_rating_cat) %>% 
  pluck(1) %>% 
  model.frame() %>% 
  as_tibble() %>% 
  rename(weights = `(weights)`) %>% 
  mutate(weights = as.numeric(weights)) %>% 
  filter(weights != max(weights)) %>% 
  ggplot(aes(x = weights)) +
  geom_histogram(bins = 30) +
  labs(title = "Distribution of Box-Cox Weights",
       x = "Box-Cox Weights",
       y = "Frequency") +
  theme_minimal() +
  theme(legend.position = "none")

Let’s see all values of weight

ManyEcoEvo_results %>% 
  ungroup %>% 
  filter(exclusion_set == "complete", 
         publishable_subset == "All",
         expertise_subset == "All",
         dataset == "eucalyptus") %>% 
  hoist(.col = effects_analysis,
        "lambda",
        .simplify = TRUE,
        .transform = unique) %>% 
  select(exclusion_set, 
         dataset, 
         estimate_type, 
         box_cox_rating_cat, 
         lambda, data) %>% 
  pull(box_cox_rating_cat) %>% 
  pluck(1) %>% 
  model.frame() %>% 
  as_tibble() %>% 
  rename(weights = `(weights)`) %>% 
  mutate(weights = as.numeric(weights)) %>% 
  pluck("weights", unique, sort) # all weights
##  [1]    563686395    563717442    563749404    563792386    565399621
##  [6]    565829955    565935996    566268848    567997367    569333108
## [11]    574528639    575196234    575387867    575722251    577600776
## [16]    579285551    579948308    580899020    583278546    584152556
## [21]    585498693    588256091    589433325    591917249    592900308
## [26]    607544276    617833974    622796581    626556210    633218858
## [31]    634782083    639555963    645135519    675024708    679607659
## [36]    679917410    685839966    694551570    731532604    810363823
## [41]    841303906    848538448    872489778    883906458    889910295
## [46]    890972369    919288483    933683584    962902173   1041096068
## [51]   1105126672   1155109828   1175856985   1373998220   1553121885
## [56]   1652915383   1739127212   1859021802   1921555550   1932590106
## [61]   2160580342   2185971274   2284404564   3627724910   4623841569
## [66]   5082015565   5099533477   5127083966   5313360448   5637576773
## [71]   5749335304   5767498603   5986636683   6027266130   7006392797
## [76]   9979626599  11545920175 708161001050

What are the cases with extreme weights? Below we identify the cases with the top 2 maximum weights

extreme_weight_observation <- 
  ManyEcoEvo_results %>% 
  ungroup %>% 
  filter(exclusion_set == "complete", 
         publishable_subset == "All",
         expertise_subset == "All",
         dataset == "eucalyptus") %>% 
  pluck("effects_analysis", 1) %>% 
  select(study_id, box_cox_abs_deviation_score_estimate, box_cox_var) %>% 
  mutate(weights = 1/box_cox_var) %>%
  # filter(weights == max(weights))
  filter(weights > 10000000000)
extreme_weight_observation
## # A tibble: 2 × 4
##   study_id       box_cox_abs_deviation_score_estimate box_cox_var       weights
##   <chr>                                         <dbl>       <dbl>         <dbl>
## 1 Brooklyn-2-2-1                                 1.48    1.41e-12 708161001050.
## 2 Cassilis-1-1-1                                -1.11    8.66e-11  11545920175.

Investigating the impact of extreme weights

Remove Eucalyptus cases with extreme weights and refit / plot models

refitted_eucalyptus_model <- 
  ManyEcoEvo_results %>% 
  ungroup %>% 
  filter(exclusion_set == "complete", 
         publishable_subset == "All",
         expertise_subset == "All",
         dataset == "eucalyptus") %>% 
  mutate(effects_analysis = map(effects_analysis, ~filter(.x, study_id %nin% extreme_weight_observation$study_id))) %>%
  mutate(box_cox_rating_cat = map(effects_analysis, ~fit_boxcox_ratings_cat(
    .data = .x,
    outcome = box_cox_abs_deviation_score_estimate,
    outcome_var = box_cox_var,
    interceptless = FALSE
  )))
## 
## ── Fitting lmer with categorical ratings predictor on box_cox_transformed outcomes ──
## 
## boundary (singular) fit: see help('isSingular')

Note, not shown here yet. But when only the top most case is removed, lme4 gives singular fit warning, when I run check_singularity() on the model, it seems OK. When the top two cases are removed, singular fit warning is given by lme4, and check_singularity() returns FALSE, meaning the model is singular. Also, when I check convergence on the model with top 2 cases removed, gradient shifts from ~3.79e^-5 with only case with max weight removed to 0. Seems strange to me that this would occur when both extreme weights are removed… Anyways, I progress with refitting and plotting after removing cases with weights 10000000000 and above (here, there are two). Recreate the plot data for regenerating plots after refitting models

refitted_plot_data<-
  refitted_eucalyptus_model %>% 
  hoist(.col = effects_analysis,
        "lambda",
        .simplify = TRUE,
        .transform = unique) %>% 
  select(exclusion_set, 
         dataset, 
         estimate_type, 
         box_cox_rating_cat, 
         lambda) %>% 
  mutate(predictor_means = 
           map(box_cox_rating_cat, modelbased::estimate_means),
         model_data = map(box_cox_rating_cat, ~pluck(.x, "frame") %>% 
                            drop_na() %>% 
                            as_tibble()),
         plot_name = paste(exclusion_set, dataset, sep = ", ")) %>%
  mutate(model_data = map(model_data, 
                          .f = ~ mutate(.x, PublishableAsIs =
                                          str_replace(PublishableAsIs,
                                                      "publishable with ", "") %>%
                                          str_replace("deeply flawed and ", "") %>% 
                                          capwords())),
         predictor_means = map(predictor_means,
                               .f = ~ mutate(.x, PublishableAsIs =
                                               str_replace(PublishableAsIs,
                                                           "publishable with ", "") %>%
                                               str_replace("deeply flawed and ", "") %>% 
                                               capwords()))) %>% 
  mutate(plot_name = 
           str_remove(plot_name, "complete, ") %>% 
           str_replace_all(., " ", "_") %>% 
           paste0("_violin_plot"))
## We selected `at = c("PublishableAsIs")`.

Create violin plots for refitted models

refitted_plot_data %>% 
  pmap(.l = list(.$model_data, .$predictor_means, .$plot_name, .$lambda),
       .f = ~ plot_model_means_box_cox_cat(..1, 
                                           PublishableAsIs, 
                                           ..2,
                                           new_order = 
                                             c("Unpublishable",
                                               "Major Revision",
                                               "Minor Revision",
                                               "Publishable As Is"),
                                           ..3, ..4, back_transform = FALSE))
## [[1]]

Regenerate plot but back-transform to original scale (absolute deviation from meta-analytic mean)

refitted_plot_data %>% 
  pmap(.l = list(.$model_data, .$predictor_means, .$plot_name, .$lambda),
       .f = ~ plot_model_means_box_cox_cat(..1, 
                                           PublishableAsIs, 
                                           ..2,
                                           new_order = 
                                             c("Unpublishable",
                                               "Major Revision",
                                               "Minor Revision",
                                               "Publishable As Is"),
                                           ..3, ..4, back_transform = TRUE))
## [[1]]

Create orchard-style plots for refitted models:

refitted_plot_data %>% 
  mutate(plot_name = str_replace(plot_name, "_violin_plot", "_orchard_plot")) %>%
  pmap(.l = list(.$model_data, .$predictor_means, .$plot_name),
       .f = ~ plot_model_means_orchard(..1, 
                                       PublishableAsIs, 
                                       ..2,
                                       new_order = 
                                         c("Unpublishable",
                                           "Major Revision",
                                           "Minor Revision",
                                           "Publishable As Is"),
                                       ..3))
## [[1]]

What is causing these extreme values observed in extreme_weight_observation? Weights seem correlated with the box-cox transformed deviation score too

ManyEcoEvo_results %>%
  ungroup %>% 
  filter(exclusion_set == "complete", 
         publishable_subset == "All",
         expertise_subset == "All",
         dataset == "eucalyptus") %>% 
  select(effects_analysis) %>% 
  unnest(effects_analysis) %>% 
  filter(study_id %in% extreme_weight_observation$study_id) %>%
  select(study_id, 
         Zr,
         VZr,
         abs_deviation_score_estimate,
         box_cox_abs_deviation_score_estimate, 
         folded_mu_val,
         folded_v_val,
         box_cox_var,
         lambda) %>% 
  mutate(weights = 1/box_cox_var) %>% knitr::kable()
study_id Zr VZr abs_deviation_score_estimate box_cox_abs_deviation_score_estimate folded_mu_val folded_v_val box_cox_var lambda weights
Brooklyn-2-2-1 -4.4681193 0.0086957 4.3758768 1.476168 4.3758768 0.0086957 0 5.58e-05 708161001050
Cassilis-1-1-1 0.2361069 0.0030038 0.3283493 -1.113643 0.3283493 0.0030038 0 5.58e-05 11545920175
ManyEcoEvo_results %>%
  ungroup %>% 
  filter(exclusion_set == "complete", 
         publishable_subset == "All",
         expertise_subset == "All") %>% 
  select(effects_analysis) %>% 
  unnest(effects_analysis) %>% 
  unnest(review_data) %>% 
  mutate(weights = 1/box_cox_var) %>% 
  write_csv("extreme_weight_analysis_info.csv")

The weights are calculated as the inverse of the variance of the box-cox transformed absolute deviation scores. The variance of the box-cox transformed absolute deviation scores box_cox_var is calculated as the variance of the folded absolute deviation scores and the folded variance of the absolute deviation scores The folded absolute deviation scores folded_mu and the The folded variance of the absolute deviation scores folded_v are calculated as follows:

box_cox_transform
## function(data, dataset) {
##   if(rlang::is_na(data) | rlang::is_null(data)){
##     cli::cli_alert_warning(text =  glue::glue("Cannot box-cox transform data for", 
##                                               paste(names(dplyr::cur_group()), 
##                                                     dplyr::cur_group(), 
##                                                     sep = " = ", 
##                                                     collapse = ", ")))
##     result <- NA
##   } else{
##     cli::cli_h2(glue::glue("Box-cox transforming absolute deviation scores for ", {dataset}))
## 
##     box_cox_recipe <- recipes::recipe(~., 
##                                       data = select(data, 
##                                                     starts_with("abs_deviation_score_"))) %>% 
##       timetk::step_box_cox(everything(),limits = c(0,10)) %>% 
##       recipes::prep(training = data, retain = TRUE) #estimate lambda + box cox transform vars
##     
##     if(box_cox_recipe %>% 
##        recipes::tidy(number = 1) %>% nrow() > 0){
##       lambda <- box_cox_recipe %>% 
##         recipes::tidy(number = 1) %>% 
##         pull(., lambda) %>% 
##         `names<-`(., pull(box_cox_recipe %>% 
##                             recipes::tidy(number = 1), terms))
##       
##       if(!is.null(dataset)){
##         cli::cli_alert_info(c(
##           "Optimised Lambda used in Box-Cox Transformation of ",
##           "{dataset} dataset variables ",
##           "is {round(lambda, 4)} for `{names(lambda)}`."
##         ))
##       }
##       
##       variance_box_cox <- function(folded_mu, folded_v, lambda){
##         variance_bc <- folded_v*(lambda*folded_mu^(lambda-1))^2 # delta method
##         return(variance_bc)
##       }
##       
##       # folded abs_dev_score
##       folded_mu <- function(abs_dev_score, VZr) {
##         mu <- abs_dev_score
##         sigma <- sqrt(VZr)
##         fold_mu <- sigma * sqrt(2/pi) * exp((-mu^2)/(2 * sigma^2)) + mu * (1 - 2 * pnorm(-mu/sigma))
##         fold_mu
##       }
##       
##       # folded VZr
##       folded_v <- function(abs_dev_score, VZr) {
##         mu <- abs_dev_score
##         sigma <- sqrt(VZr)
##         fold_mu <- sigma * sqrt(2/pi) * exp((-mu^2)/(2 * sigma^2)) + mu * (1 - 2 * pnorm(-mu/sigma))
##         fold_se <- sqrt(mu^2 + sigma^2 - fold_mu^2)
##         # adding se to make bigger abs_dev_score
##         fold_v <- fold_se^2
##         fold_v
##       }
##       
##       Z_colname <- data %>% colnames %>% keep(., str_starts(., "Z"))
##       VZ_colname <- data %>% colnames %>% keep(., str_starts(., "VZ"))
##       result <- recipes::juice(box_cox_recipe) %>% 
##         rename_with(.fn = ~ paste0("box_cox_", .x)) %>% 
##         bind_cols(data, .) %>% 
##         mutate(folded_mu_val = folded_mu(abs_deviation_score_estimate, !!as.name(VZ_colname)),
##                folded_v_val = folded_v(abs_deviation_score_estimate, !!as.name(VZ_colname)),
##                box_cox_var = variance_box_cox(folded_mu_val, folded_v_val, lambda[[1]]),
##                lambda = lambda[[1]])
##       
##     } else{
##       result <- NA
##       cli::cli_alert_warning(text =  glue::glue("Lambda cannot be computed."))
##     }
##     
##     
##   }
##   
##   return(result)
##   
## }
## <bytecode: 0x1247db970>
## <environment: namespace:ManyEcoEvo>

Systematically Comparing Different Weights & Random Effects

So it seems we need to reconsider different weight options, but this might mean we need to reconsider returning back to the planned random effects structure, too

filter_args = rlang::exprs(exclusion_set == "complete", 
                           publishable_subset == "All",
                           expertise_subset == "All",
                           collinearity_subset == "All")

prepare_ratings_data <- function(effects_analysis){
  data_tbl <-
    effects_analysis %>% 
    unnest(cols = c(review_data)) %>% 
    select(study_id, 
           TeamIdentifier,
           starts_with("box_cox_"),
           ReviewerId, 
           PublishableAsIs,
           # lambda,
           folded_v_val) %>% 
    ungroup() %>% 
    mutate(PublishableAsIs = forcats::fct_relevel(PublishableAsIs,c("deeply flawed and unpublishable", 
                                                                    "publishable with major revision", 
                                                                    "publishable with minor revision", 
                                                                    "publishable as is")),
           obs_id = 1:n()) 
  return(data_tbl)
}

# create formulas

base_formula <- rlang::new_formula(expr(box_cox_abs_deviation_score_estimate), 
                                   expr(PublishableAsIs))

calc_inv_bc_var <- as_function(~ 1/pull(.x, box_cox_var))
calc_inv_folded_v <- as_function(~ 1/pull(.x, folded_v_val))
no_weights <- NA

weight_formulas <- list(no_weights,
                        calc_inv_bc_var,
                        calc_inv_folded_v
) %>% 
  set_names("no_weights",
            "inv_bc_var",
            "inv_folded_v")

RE_rev <- expr((1 | ReviewerId))
RE_study <- expr((1 | study_id))
RE_both <- expr(!!RE_rev + !!RE_study)

random_expressions <- list(
  RE_rev,
  RE_study,
  RE_both
) %>% set_names("RE_rev",
                "RE_study",
                "RE_both")

lmer_wrap <- function(data_tbl, random_effect, weight_form, ..., env = caller_env()){
  f <- rlang::new_formula(expr(box_cox_abs_deviation_score_estimate), 
                          expr(PublishableAsIs + !!random_effect), env = env)
  
  weights <- if(is_na(weight_form)) NULL else weight_form(data_tbl)
  
  
  inject(lme4::lmer(!!f,
                    data = data_tbl,
                    weights = weights, ...))
}

all_models <-
  ManyEcoEvo_results %>% 
  ungroup %>% 
  filter(!!!filter_args) %>% 
  select(dataset, effects_analysis) %>%
  hoist(.col = effects_analysis,
        "lambda",
        .simplify = TRUE,
        .transform = unique) %>% 
  mutate(model_data = map(effects_analysis, prepare_ratings_data),.keep = "unused") %>% 
  expand_grid(expand_grid(weight_formulas, random_expressions) %>% 
                mutate(weight_forms = names(weight_formulas),
                       random_effect = names(random_expressions)) %>% 
                unite("model_spec", weight_forms, random_effect, sep = "_") ) %>% 
  mutate(model = pmap(list(model_data, random_expressions, weight_formulas), lmer_wrap),.keep = "unused") %>% 
  mutate(singularity = map_lgl(model, performance::check_singularity),
         convergence = map_lgl(model, performance::check_convergence))
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## Warning: There were 16 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `model = pmap(list(model_data, random_expressions,
##   weight_formulas), lmer_wrap)`.
## Caused by warning in `checkConv()`:
## ! Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 15 remaining warnings.
estimate_means <- 
  all_models %>% 
  filter(singularity == F, convergence == T) %>%
  reframe(model = set_names(model, model_spec), 
          results = map(model, possibly(modelbased::estimate_means, otherwise = NULL), at = "PublishableAsIs"),
          results = set_names(results, dataset), dataset = dataset, model_spec = model_spec) %>% 
  rowwise() %>% 
  drop_na(results) # model means couldn't be estimated due to convergence issues, drop those models
## Warning: There were 20 warnings in `reframe()`.
## The first warning was:
## ℹ In argument: `results = map(...)`.
## Caused by warning:
## ! Could not recover model data from environment. Please make sure your
##   data is available in your workspace.
##   Trying to retrieve data from the model frame now.
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 19 remaining warnings.
# evaluate and compare performance for remaining models
model_comparison_results <-
  all_models %>% semi_join(estimate_means, by = join_by(dataset, model_spec)) %>% 
  group_by(dataset) %>% 
  summarise(model = set_names(model, model_spec) %>% list, 
            results = map(model, performance::compare_performance, rank = T), 
            results = set_names(results, unique(dataset)), .groups = "keep")
## Some of the nested models seem to be identical and probably only vary in
##   their random effects.
## Some of the nested models seem to be identical and probably only vary in
##   their random effects.
model_comparison_results %>% pull(results) %>% map(knitr::kable)

$blue tit

Name Model R2_conditional R2_marginal ICC RMSE Sigma AIC_wt AICc_wt BIC_wt Performance_Score
no_weights_RE_rev lmerMod 0.0906698 0.0074686 0.0838273 0.4827981 0.4979737 1 1 1 1.0000000
inv_folded_v_RE_rev lmerMod 0.0004711 0.0000514 0.0004198 0.5026704 13.1865065 0 0 0 0.1000229
inv_bc_var_RE_rev lmerMod 0.0007673 0.0001311 0.0006363 0.5822505 11.8297761 0 0 0 0.0154436

$eucalyptus

Name Model R2_conditional R2_marginal ICC RMSE Sigma AIC_wt AICc_wt BIC_wt Performance_Score
inv_bc_var_RE_study lmerMod 0.9997595 0.0000000 0.9997595 0.000000 8.240200e-03 1 1 1 0.8750000
no_weights_RE_rev lmerMod 0.1318616 0.0124202 0.1209435 1.017341 1.060566e+00 0 0 0 0.3217924
inv_folded_v_RE_rev lmerMod 0.0005661 0.0000182 0.0005479 1.126659 1.968092e+01 0 0 0 0.1563435
inv_bc_var_RE_rev lmerMod 0.0000000 0.0000000 0.0000000 1.499387 4.681748e+04 0 0 0 0.0000000
model_comparison_plots <- 
  model_comparison_results %>% 
  pull(results, "dataset") %>% map(plot)


model_comparison_plots[[1]] + ggtitle(names(model_comparison_plots)[[1]])

model_comparison_plots[[2]] + ggtitle(names(model_comparison_plots)[[2]])

note that the Performance scores are relative to best performing model for each dataset

model_means_results <- 
  estimate_means %>% 
  left_join(model_comparison_results %>% select(-model) %>% unnest(results), by = join_by("dataset", "model_spec" == "Name")) %>%
  mutate(label = paste(dataset, model_spec, sep = ".")) %>% 
  arrange(dataset, desc(Performance_Score)) %>% 
  select(Performance_Score, dataset, model_spec, results) %>% 
  mutate(label = paste(dataset, model_spec, sep = ".")) 

BT model means

model_means_results %>% 
  filter(dataset == "blue tit") %>% 
  pull(results, name = model_spec) %>% 
  map(., plot, at = "PublishableAsIs") %>% 
  map2(., names(.), ~ .x + labs(subtitle = as_label(.y), title = NULL) +  
         see::theme_lucid() + theme(axis.text.x = element_text(angle = 50, hjust = 1))) 
## Warning: Could not recover model data from environment. Please make sure your
##   data is available in your workspace.
##   Trying to retrieve data from the model frame now.
## Warning: Could not recover model data from environment. Please make sure your
##   data is available in your workspace.
##   Trying to retrieve data from the model frame now.
## Warning: Could not recover model data from environment. Please make sure your
##   data is available in your workspace.
##   Trying to retrieve data from the model frame now.
## $no_weights_RE_rev

## 
## $inv_folded_v_RE_rev

## 
## $inv_bc_var_RE_rev

model_means_results %>% 
  pull(results, name = label) %>% 
  map(knitr::kable)

$blue tit.no_weights_RE_rev

PublishableAsIs Mean SE CI_low CI_high
deeply flawed and unpublishable -1.108483 0.1126669 -1.330045 -0.8869225
publishable with major revision -1.299960 0.0477652 -1.394240 -1.2056797
publishable with minor revision -1.300669 0.0396926 -1.379313 -1.2220256
publishable as is -1.240809 0.0695605 -1.377695 -1.1039236

$blue tit.inv_folded_v_RE_rev

PublishableAsIs Mean SE CI_low CI_high
deeply flawed and unpublishable -1.395268 0.1416273 -1.672852 -1.117683
publishable with major revision -1.480845 0.0649463 -1.608137 -1.353552
publishable with minor revision -1.428534 0.0532131 -1.532830 -1.324239
publishable as is -1.180024 0.0846843 -1.346003 -1.014046

$blue tit.inv_bc_var_RE_rev

PublishableAsIs Mean SE CI_low CI_high
deeply flawed and unpublishable -0.9522328 0.1183777 -1.1842488 -0.7202168
publishable with major revision -1.1493970 0.0587054 -1.2644574 -1.0343366
publishable with minor revision -1.0493725 0.0500971 -1.1475611 -0.9511839
publishable as is -0.7160750 0.0671234 -0.8476345 -0.5845154

$eucalyptus.inv_bc_var_RE_study

PublishableAsIs Mean SE CI_low CI_high
deeply flawed and unpublishable -2.611111 0.0620347 -2.734914 -2.487308
publishable with major revision -2.611111 0.0620347 -2.734914 -2.487308
publishable with minor revision -2.611111 0.0620347 -2.734914 -2.487308
publishable as is -2.611111 0.0620347 -2.734914 -2.487308

$eucalyptus.no_weights_RE_rev

PublishableAsIs Mean SE CI_low CI_high
deeply flawed and unpublishable -2.657022 0.2702666 -3.190059 -2.123985
publishable with major revision -2.366884 0.1248189 -2.613713 -2.120055
publishable with minor revision -2.646360 0.1093437 -2.863306 -2.429414
publishable as is -2.604804 0.1691851 -2.938768 -2.270839

$eucalyptus.inv_folded_v_RE_rev

PublishableAsIs Mean SE CI_low CI_high
deeply flawed and unpublishable -3.312195 0.2585561 -3.818956 -2.805434
publishable with major revision -2.968746 0.1253142 -3.214357 -2.723135
publishable with minor revision -3.016617 0.1085725 -3.229415 -2.803819
publishable as is -3.084911 0.1624810 -3.403368 -2.766454

$eucalyptus.inv_bc_var_RE_rev

PublishableAsIs Mean SE CI_low CI_high
deeply flawed and unpublishable -0.5121906 0.3861934 -1.269116 0.2447346
publishable with major revision -1.0030146 0.2980537 -1.587189 -0.4188402
publishable with minor revision -2.2246690 0.2987831 -2.810273 -1.6390648
publishable as is -2.7559003 0.3383141 -3.418984 -2.0928169
  • inv_bc_var model mean getting pulled towards 0
  • inv_folded_v, better, but model mean getting pulled more negatively
  • no weights seems like most appropriate option

Eucalyptus model means

model_means_results %>% 
  filter(dataset == "eucalyptus") %>% 
  pull(results, name = model_spec) %>% 
  map(., plot, at = "PublishableAsIs") %>% 
  map2(., names(.), ~ .x + labs(subtitle = as_label(.y), title = NULL) +  
         see::theme_lucid() + theme(axis.text.x = element_text(angle = 50, hjust = 1))) 
## Warning: Could not recover model data from environment. Please make sure your
##   data is available in your workspace.
##   Trying to retrieve data from the model frame now.
## Warning: Could not recover model data from environment. Please make sure your
##   data is available in your workspace.
##   Trying to retrieve data from the model frame now.
## Warning: Could not recover model data from environment. Please make sure your
##   data is available in your workspace.
##   Trying to retrieve data from the model frame now.
## Warning: Could not recover model data from environment. Please make sure your
##   data is available in your workspace.
##   Trying to retrieve data from the model frame now.
## $inv_bc_var_RE_study

## 
## $no_weights_RE_rev

## 
## $inv_folded_v_RE_rev

## 
## $inv_bc_var_RE_rev

  • inv_bc_var model mean getting pulled towards 0, quite extreme for Eucalyptus, especially for poorly rated analyses. Only in case when random effect included for ReviewerID
  • when RE for study id substituted, model means seem more fitting to data, but uncertainty seems artificially small given spread of data.
  • inv_folded_v, better, but model mean getting pulled negatively, just ilke for BT analyses
  • no weights seems like most appropriate option here, but hard to say since could not include desired random effects structure (+ study ID)

And how does accounting for rm highly collinear analyses affect the model performance?

filter_args <- discard_at(filter_args, length(filter_args)) %>% 
  list(expr(dataset == "blue tit")) %>% list_flatten()

bt_co_v_all <- 
  ManyEcoEvo_results %>% 
  ungroup %>% 
  filter(!!!filter_args) %>% 
  select(dataset, collinearity_subset, effects_analysis) %>%
  mutate(model_data = map(effects_analysis, prepare_ratings_data),.keep = "unused") %>% 
  expand_grid(expand_grid(weight_formulas, random_expressions) %>% 
                mutate(weight_forms = names(weight_formulas),
                       random_effect = names(random_expressions)) %>% 
                unite("model_spec", weight_forms, random_effect, sep = "_") ) %>% 
  mutate(model = pmap(list(model_data, random_expressions, weight_formulas), lmer_wrap),.keep = "unused") %>% 
  mutate(singularity = map_lgl(model, performance::check_singularity),
         convergence = map_lgl(model, performance::check_convergence))
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## Warning: There were 17 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `model = pmap(list(model_data, random_expressions,
##   weight_formulas), lmer_wrap)`.
## Caused by warning in `checkConv()`:
## ! Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 16 remaining warnings.
bt_co_v_reduced <- 
  bt_co_v_all %>% 
  filter(singularity == F, convergence == T) %>%
  group_by(collinearity_subset, model_spec) %>% 
  reframe(model = set_names(model, model_spec), 
          results = map(model, possibly(modelbased::estimate_means, otherwise = NULL), at = "PublishableAsIs"),
          results = set_names(results, collinearity_subset), collinearity_subset,  model_spec) %>% 
  rowwise() %>% 
  drop_na(results)
## Warning: There were 19 warnings in `reframe()`.
## The first warning was:
## ℹ In argument: `results = map(...)`.
## ℹ In group 1: `collinearity_subset = "All"` and `model_spec =
##   "inv_bc_var_RE_rev"`.
## Caused by warning:
## ! Could not recover model data from environment. Please make sure your
##   data is available in your workspace.
##   Trying to retrieve data from the model frame now.
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 18 remaining warnings.
bt_perf <- 
  bt_co_v_all %>% semi_join(bt_co_v_reduced, by = join_by(collinearity_subset, model_spec)) %>% 
  group_by(collinearity_subset) %>% 
  summarise(model = set_names(model, model_spec) %>% list, 
            .groups = "keep") %>% 
  mutate( results = map(model, performance::compare_performance, rank = T), 
          results = set_names(results, unique(collinearity_subset)))
## Some of the nested models seem to be identical and probably only vary in
##   their random effects.
## Some of the nested models seem to be identical and probably only vary in
##   their random effects.
bt_perf %>% 
  pull(results, "collinearity_subset") %>% 
  map(plot)
## $All

## 
## $collinearity_removed

bt_perf %>% pull(results) %>% map(knitr::kable)

$All

Name Model R2_conditional R2_marginal ICC RMSE Sigma AIC_wt AICc_wt BIC_wt Performance_Score
no_weights_RE_rev lmerMod 0.0906698 0.0074686 0.0838273 0.4827981 0.4979737 1 1 1 1.0000000
inv_folded_v_RE_rev lmerMod 0.0004711 0.0000514 0.0004198 0.5026704 13.1865065 0 0 0 0.1000229
inv_bc_var_RE_rev lmerMod 0.0007673 0.0001311 0.0006363 0.5822505 11.8297761 0 0 0 0.0154436

$collinearity_removed

Name Model R2_conditional R2_marginal ICC RMSE Sigma AIC_wt AICc_wt BIC_wt Performance_Score
no_weights_RE_rev lmerMod 0.1366284 0.0114632 0.1266166 0.2828712 0.2947137 0.0587685 0.0587685 0.0587685 0.6484142
inv_bc_var_RE_rev lmerMod 0.0004195 0.0000167 0.0004028 0.3020777 5.0206226 0.9412315 0.9412315 0.9412315 0.4124799
inv_folded_v_RE_rev lmerMod 0.0005121 0.0000124 0.0004997 0.3022892 6.9375939 0.0000000 0.0000000 0.0000000 0.0001809
bt_mod_means_res <- 
  bt_co_v_reduced  %>% 
  left_join(bt_perf %>% select(-model) %>% unnest(results), by = join_by("collinearity_subset", "model_spec" == "Name")) %>%
  mutate(label = paste(collinearity_subset, model_spec, sep = ".")) %>% 
  arrange(collinearity_subset, desc(Performance_Score)) %>% 
  select(Performance_Score, collinearity_subset, model_spec, results) %>% 
  mutate(label = paste(collinearity_subset, model_spec, sep = ".")) 
bt_mod_means_res %>% 
  pull(results, name = label) %>% 
  map(knitr::kable)

$All.no_weights_RE_rev

PublishableAsIs Mean SE CI_low CI_high
deeply flawed and unpublishable -1.108483 0.1126669 -1.330045 -0.8869225
publishable with major revision -1.299960 0.0477652 -1.394240 -1.2056797
publishable with minor revision -1.300669 0.0396926 -1.379313 -1.2220256
publishable as is -1.240809 0.0695605 -1.377695 -1.1039236

$All.inv_folded_v_RE_rev

PublishableAsIs Mean SE CI_low CI_high
deeply flawed and unpublishable -1.395268 0.1416273 -1.672852 -1.117683
publishable with major revision -1.480845 0.0649463 -1.608137 -1.353552
publishable with minor revision -1.428534 0.0532131 -1.532830 -1.324239
publishable as is -1.180024 0.0846843 -1.346003 -1.014046

$All.inv_bc_var_RE_rev

PublishableAsIs Mean SE CI_low CI_high
deeply flawed and unpublishable -0.9522328 0.1183777 -1.1842488 -0.7202168
publishable with major revision -1.1493970 0.0587054 -1.2644574 -1.0343366
publishable with minor revision -1.0493725 0.0500971 -1.1475611 -0.9511839
publishable as is -0.7160750 0.0671234 -0.8476345 -0.5845154

$collinearity_removed.no_weights_RE_rev

PublishableAsIs Mean SE CI_low CI_high
deeply flawed and unpublishable -0.9413372 0.0715214 -1.081986 -0.8006889
publishable with major revision -1.0699941 0.0316932 -1.132567 -1.0074213
publishable with minor revision -1.0952845 0.0263067 -1.147430 -1.0431388
publishable as is -1.0858540 0.0457896 -1.175984 -0.9957241

$collinearity_removed.inv_bc_var_RE_rev

PublishableAsIs Mean SE CI_low CI_high
deeply flawed and unpublishable -0.9801828 0.0633910 -1.104427 -0.8559387
publishable with major revision -1.0229691 0.0294880 -1.080764 -0.9651738
publishable with minor revision -1.0134392 0.0228057 -1.058138 -0.9687408
publishable as is -0.9607954 0.0394182 -1.038054 -0.8835371

$collinearity_removed.inv_folded_v_RE_rev

PublishableAsIs Mean SE CI_low CI_high
deeply flawed and unpublishable -1.178058 0.0776523 -1.330254 -1.025863
publishable with major revision -1.207697 0.0371832 -1.280575 -1.134820
publishable with minor revision -1.192916 0.0300419 -1.251797 -1.134035
publishable as is -1.129077 0.0506755 -1.228399 -1.029755
bt_mod_means_res %>% 
  pull(results, name = label) %>% 
  map(., plot, at = "PublishableAsIs") %>% 
  map2(., names(.), ~ .x + labs(subtitle = as_label(.y), title = NULL) +  
         see::theme_lucid() + theme(axis.text.x = element_text(angle = 50, hjust = 1)))
## Warning: Could not recover model data from environment. Please make sure your
##   data is available in your workspace.
##   Trying to retrieve data from the model frame now.
## Warning: Could not recover model data from environment. Please make sure your
##   data is available in your workspace.
##   Trying to retrieve data from the model frame now.
## Warning: Could not recover model data from environment. Please make sure your
##   data is available in your workspace.
##   Trying to retrieve data from the model frame now.
## Warning: Could not recover model data from environment. Please make sure your
##   data is available in your workspace.
##   Trying to retrieve data from the model frame now.
## Warning: Could not recover model data from environment. Please make sure your
##   data is available in your workspace.
##   Trying to retrieve data from the model frame now.
## Warning: Could not recover model data from environment. Please make sure your
##   data is available in your workspace.
##   Trying to retrieve data from the model frame now.

\(All.no_weights_RE_rev <img src="investigate_weights_files/figure-html/unnamed-chunk-23-1.png" width="672" />\)All.inv_folded_v_RE_rev \(All.inv_bc_var_RE_rev <img src="investigate_weights_files/figure-html/unnamed-chunk-23-3.png" width="672" />\)collinearity_removed.no_weights_RE_rev \(collinearity_removed.inv_bc_var_RE_rev <img src="investigate_weights_files/figure-html/unnamed-chunk-23-5.png" width="672" />\)collinearity_removed.inv_folded_v_RE_rev